Confirmatory Factor Analysis and Structural Equation Models

Work in Progress

cfa
sem
measurment
library(lavaan)
library(dplyr)
library(reticulate)
library(marginaleffects)
library(modelsummary)
library(ggplot2)
library(egg)
library(lme4)
library(semPlot)
library(tinytable)
library(kableExtra)
library(reshape2)
reticulate::py_run_string("import pymc as pm")

options(rstudio.python.installationPath = "/Users/nathanielforde/mambaforge/envs")
options("modelsummary_factory_default" = "tinytable")
options(repr.plot.width=15, repr.plot.height=8)

knitr::knit_engines$set(python = reticulate::eng_python)
options(scipen=999)
set.seed(130)
Warning

This is a work in progress. We intend to elaborate the choices related to analysing and understanding the data elicted through intentional psychometrics surveys

Measurment and Measurment Constructs

df = read.csv('sem_data.csv')
inflate <- df$region == 'west'
noise <- rnorm(sum(inflate), 0.5, 1) # generate the noise to add
df$ls_p3[inflate] <- df$ls_p3[inflate] + noise
df$ls_sum <- df$ls_p1 + df$ls_p2 + df$ls_p3
df$ls_mean <- rowMeans(df[ c('ls_p1', 'ls_p2', 'ls_p3')])

head(df) |> kable()
ID region gender age se_acad_p1 se_acad_p2 se_acad_p3 se_social_p1 se_social_p2 se_social_p3 sup_friends_p1 sup_friends_p2 sup_friends_p3 sup_parents_p1 sup_parents_p2 sup_parents_p3 ls_p1 ls_p2 ls_p3 ls_sum ls_mean
1 west female 13 4.857143 5.571429 4.500000 5.80 5.500000 5.40 6.5 6.5 7.0 7.0 7.0 6.0 5.333333 6.75 7.647112 19.73044 6.576815
2 west male 14 4.571429 4.285714 4.666667 5.00 5.500000 4.80 4.5 4.5 5.5 5.0 6.0 4.5 4.333333 5.00 4.469623 13.80296 4.600985
10 west female 14 4.142857 6.142857 5.333333 5.20 4.666667 6.00 4.0 4.5 3.5 7.0 7.0 6.5 6.333333 5.50 4.710020 16.54335 5.514451
11 west female 14 5.000000 5.428571 4.833333 6.40 5.833333 6.40 7.0 7.0 7.0 7.0 7.0 7.0 4.333333 6.50 5.636198 16.46953 5.489844
12 west female 14 5.166667 5.600000 4.800000 5.25 5.400000 5.25 7.0 7.0 7.0 6.5 6.5 7.0 5.666667 6.00 5.266592 16.93326 5.644419
14 west male 14 4.857143 4.857143 4.166667 5.20 5.000000 4.20 5.5 6.5 7.0 6.5 6.5 6.5 5.000000 5.50 5.913709 16.41371 5.471236

Candidate Structure

#| code-fold: true
#| code-summary: "Show the code"

datasummary_skim(df)|> 
 style_tt(
   i = 15:17,
   j = 1:1,
   background = "#20AACC",
   color = "white",
   italic = TRUE) |> 
 style_tt(
   i = 18:19,
   j = 1:1,
   background = "#2888A0",
   color = "white",
   italic = TRUE) |> 
 style_tt(
   i = 2:14,
   j = 1:1,
   background = "#17C2AD",
   color = "white",
   italic = TRUE)
tinytable_ftyu5q15n5ibjk84ch05
Unique Missing Pct. Mean SD Min Median Max Histogram
ID 283 0 187.9 106.3 1.0 201.0 367.0
age 5 0 14.7 0.8 13.0 15.0 17.0
se_acad_p1 32 0 5.2 0.8 3.1 5.1 7.0
se_acad_p2 36 0 5.3 0.7 3.1 5.4 7.0
se_acad_p3 29 0 5.2 0.8 2.8 5.2 7.0
se_social_p1 24 0 5.3 0.8 1.8 5.4 7.0
se_social_p2 27 0 5.5 0.7 2.7 5.5 7.0
se_social_p3 31 0 5.4 0.8 3.0 5.5 7.0
sup_friends_p1 13 0 5.8 1.1 1.0 6.0 7.0
sup_friends_p2 10 0 6.0 0.9 2.5 6.0 7.0
sup_friends_p3 13 0 6.0 1.1 1.0 6.0 7.0
sup_parents_p1 11 0 6.0 1.1 1.0 6.0 7.0
sup_parents_p2 11 0 5.9 1.1 2.0 6.0 7.0
sup_parents_p3 13 0 5.7 1.1 1.0 6.0 7.0
ls_p1 15 0 5.2 0.9 2.0 5.3 7.0
ls_p2 21 0 5.8 0.7 2.5 5.8 7.0
ls_p3 161 0 5.5 1.1 1.7 5.6 9.6
ls_sum 218 0 16.5 2.2 8.2 16.7 21.0
ls_mean 217 0 5.5 0.7 2.7 5.6 7.0
N %
region east 142 50.2
west 141 49.8
gender female 132 46.6
male 151 53.4
drivers = c('se_acad_p1', 'se_acad_p2', 'se_acad_p3', 'se_social_p1', 'se_social_p2', 'se_social_p3', 'sup_friends_p1','sup_friends_p2', 'sup_friends_p3', 'sup_parents_p1' , 'sup_parents_p2' , 'sup_parents_p3', 'ls_p1', 'ls_p2', 'ls_p3')



plot_heatmap <- function(df, title="Sample Covariances", subtitle="Observed Measures") {
  heat_df = df |> as.matrix() |> melt()
  colnames(heat_df) <- c("x", "y", "value")
  g <- heat_df |> mutate(value = round(value, 2)) |>  ggplot(aes(x = x, y = y, fill = value)) +
    geom_tile() + geom_text(aes(label = value), color = "black", size = 4) +
   scale_fill_gradient2(
      high = 'dodgerblue4',
      mid = 'white',
      low = 'firebrick2'
    ) + theme(axis.text.x = element_text(angle=45)) + ggtitle(title, subtitle)
  
  g
}

g1 = plot_heatmap(cov(df[,  drivers]))

g2 = plot_heatmap(cor(df[,  drivers]), "Sample Correlations")

plot <- ggarrange(g1,g2, ncol=1, nrow=2);

Fit Initial Regression Models

formula_sum_1st = " ls_sum ~ se_acad_p1  + se_social_p1 +  sup_friends_p1  + sup_parents_p1"
formula_mean_1st = " ls_mean ~ se_acad_p1  + se_social_p1 +  sup_friends_p1  + sup_parents_p1"

formula_sum_12 = " ls_sum ~ se_acad_p1  + se_acad_p2 +  se_social_p1 + se_social_p2 + 
sup_friends_p1 + sup_friends_p2  + sup_parents_p1 + sup_parents_p2"
formula_mean_12 = " ls_mean ~ se_acad_p1  + se_acad_p2 +  se_social_p1 + se_social_p2 + 
sup_friends_p1 + sup_friends_p2  + sup_parents_p1 + sup_parents_p2"


formula_sum = " ls_sum ~ se_acad_p1 + se_acad_p2 + se_acad_p3 + se_social_p1 +  se_social_p2 + se_social_p3 +  sup_friends_p1 + sup_friends_p2 + sup_friends_p3 + sup_parents_p1 + sup_parents_p2 + sup_parents_p3"
formula_mean = " ls_mean ~ se_acad_p1 + se_acad_p2 + se_acad_p3 + se_social_p1 +  se_social_p2 + se_social_p3 +  sup_friends_p1 + sup_friends_p2 + sup_friends_p3 + sup_parents_p1 + sup_parents_p2 + sup_parents_p3"
mod_sum = lm(formula_sum, df)
mod_sum_1st = lm(formula_sum_1st, df)
mod_sum_12 = lm(formula_sum_12, df)
mod_mean = lm(formula_mean, df)
mod_mean_1st = lm(formula_mean_1st, df)
mod_mean_12 = lm(formula_mean_12, df)

min_max_norm <- function(x) {
    (x - min(x)) / (max(x) - min(x))
}

df_norm <- as.data.frame(lapply(df[c(5:19)], min_max_norm))

df_norm$ls_sum <- df$ls_sum
df_norm$ls_mean <- df$ls_mean

mod_sum_norm = lm(formula_sum, df_norm)
mod_mean_norm = lm(formula_mean, df_norm)

models = list(
    "Outcome: sum_score" = list("model_sum_1st_factors" = mod_sum_1st,
     "model_sum_1st_2nd_factors" = mod_sum_12,
     "model_sum_score" = mod_sum,
     "model_sum_score_norm" = mod_sum_norm
     ),
    "Outcome: mean_score" = list(
      "model_mean_1st_factors" = mod_mean_1st,
     "model_mean_1st_2nd_factors" = mod_mean_12,
     "model_mean_score"= mod_mean, 
     "model_mean_score_norm" = mod_mean_norm
    )
    )
#| code-fold: true
#| code-summary: "Show the code"

modelsummary(models, stars=TRUE, shape ="cbind") |> 
 style_tt(
   i = 2:25,
   j = 1:1,
   background = "#17C2AD",
   color = "white",
   italic = TRUE)
tinytable_jb9uvnwer8qg2neciem3
Outcome: sum_score Outcome: mean_score
model_sum_1st_factors model_sum_1st_2nd_factors model_sum_score model_sum_score_norm model_mean_1st_factors model_mean_1st_2nd_factors model_mean_score model_mean_score_norm
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
(Intercept) 6.335*** 4.038*** 3.709*** 9.081*** 2.112*** 1.346*** 1.236*** 3.027***
(0.979) (1.080) (1.071) (0.739) (0.326) (0.360) (0.357) (0.246)
se_acad_p1 0.231 -0.023 -0.068 -0.264 0.077 -0.008 -0.023 -0.088
(0.158) (0.197) (0.216) (0.833) (0.053) (0.066) (0.072) (0.278)
se_social_p1 1.039*** 0.515* 0.401+ 2.085+ 0.346*** 0.172* 0.134+ 0.695+
(0.175) (0.224) (0.224) (1.167) (0.058) (0.075) (0.075) (0.389)
sup_friends_p1 0.069 -0.263+ -0.273 -1.636 0.023 -0.088+ -0.091 -0.545
(0.095) (0.145) (0.169) (1.011) (0.032) (0.048) (0.056) (0.337)
sup_parents_p1 0.518*** 0.196 0.050 0.299 0.173*** 0.065 0.017 0.100
(0.107) (0.155) (0.161) (0.964) (0.036) (0.052) (0.054) (0.321)
se_acad_p2 0.415+ 0.382+ 1.475+ 0.138+ 0.127+ 0.492+
(0.216) (0.227) (0.875) (0.072) (0.076) (0.292)
se_social_p2 0.662** 0.483+ 2.093+ 0.221** 0.161+ 0.698+
(0.233) (0.246) (1.065) (0.078) (0.082) (0.355)
sup_friends_p2 0.358* 0.366* 1.647* 0.119* 0.122* 0.549*
(0.172) (0.180) (0.808) (0.057) (0.060) (0.269)
sup_parents_p2 0.375* 0.166 0.829 0.125* 0.055 0.276
(0.151) (0.162) (0.808) (0.050) (0.054) (0.269)
se_acad_p3 -0.072 -0.302 -0.024 -0.101
(0.196) (0.815) (0.065) (0.272)
se_social_p3 0.350+ 1.400+ 0.117+ 0.467+
(0.180) (0.721) (0.060) (0.240)
sup_friends_p3 0.056 0.334 0.019 0.111
(0.146) (0.878) (0.049) (0.293)
sup_parents_p3 0.452** 2.710** 0.151** 0.903**
(0.141) (0.847) (0.047) (0.282)
Num.Obs. 283 283 283 283 283 283 283 283
R2 0.332 0.389 0.420 0.420 0.332 0.389 0.420 0.420
R2 Adj. 0.323 0.371 0.394 0.394 0.323 0.371 0.394 0.394
AIC 1134.2 1117.0 1110.3 1110.3 512.4 495.2 488.5 488.5
BIC 1156.1 1153.5 1161.3 1161.3 534.2 531.7 539.5 539.5
Log.Lik. -561.090 -548.507 -541.139 -541.139 -250.183 -237.600 -230.232 -230.232
RMSE 1.76 1.68 1.64 1.64 0.59 0.56 0.55 0.55
models = list(
     "model_sum_1st_factors" = mod_sum_1st,
     "model_sum_1st_2nd_factors" = mod_sum_12,
     "model_sum_score" = mod_sum,
     "model_sum_score_norm" = mod_sum_norm,
     "model_mean_1st_factors" = mod_mean_1st,
     "model_mean_1st_2nd_factors" = mod_mean_12,
     "model_mean_score"= mod_mean,
     "model_mean_score_norm" = mod_mean_norm
    )

modelplot(models, coef_omit = 'Intercept') + geom_vline(xintercept = 0, linetype="dotted", 
                color = "black") + ggtitle("Comparing Model Parameter Estimates", "Across Covariates")

Significant Coefficients

g1 = modelplot(mod_sum, coef_omit = 'Intercept') +  aes(color = ifelse(p.value < 0.001, "Significant", "Not significant")) +
  scale_color_manual(values = c("grey", "black"),  guide = "none")


g2 = modelplot(mod_mean, coef_omit = 'Intercept') +  aes(color = ifelse(p.value < 0.001, "Significant", "Not significant")) +
  scale_color_manual(values = c("grey", "black"))

plot <- ggarrange(g1,g2, ncol=2, nrow=1);

Aggregate Driver Scores

df$se_acad_mean <- rowMeans(df[c('se_acad_p1', 'se_acad_p2', 'se_acad_p3')])
df$se_social_mean <- rowMeans(df[c('se_social_p1', 'se_social_p2', 'se_social_p3')])
df$sup_friends_mean <- rowMeans(df[c('sup_friends_p1', 'sup_friends_p2', 'sup_friends_p3')])
df$sup_parents_mean <- rowMeans(df[c('sup_parents_p1', 'sup_parents_p2', 'sup_parents_p3')])


formula_parcel_mean = "ls_mean ~ se_acad_mean + se_social_mean + sup_friends_mean + sup_parents_mean"

formula_parcel_sum = "ls_sum ~ se_acad_mean + se_social_mean + sup_friends_mean + sup_parents_mean"

mod_sum_parcel = lm(formula_parcel_sum, df)
mod_mean_parcel = lm(formula_parcel_mean, df)

models_parcel = list("model_sum_score" = mod_sum_parcel,
     "model_mean_score"= mod_mean_parcel
     )

modelsummary(models_parcel, stars=TRUE)
tinytable_or59mjdhizio3l7k66t7
model_sum_score model_mean_score
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
(Intercept) 4.378*** 1.459***
(1.036) (0.345)
se_acad_mean 0.252 0.084
(0.176) (0.059)
se_social_mean 1.205*** 0.402***
(0.195) (0.065)
sup_friends_mean 0.049 0.016
(0.108) (0.036)
sup_parents_mean 0.685*** 0.228***
(0.111) (0.037)
Num.Obs. 283 283
R2 0.397 0.397
R2 Adj. 0.388 0.388
AIC 1105.3 483.5
BIC 1127.1 505.3
Log.Lik. -546.638 -235.731
RMSE 1.67 0.56
g1 = modelplot(mod_sum_parcel, coef_omit = 'Intercept') +  aes(color = ifelse(p.value < 0.001, "Significant", "Not significant")) +
  scale_color_manual(values = c("grey", "black"), guide = "none")


g2 = modelplot(mod_mean_parcel, coef_omit = 'Intercept') +  aes(color = ifelse(p.value < 0.001, "Significant", "Not significant")) +
  scale_color_manual(values = c("grey", "black"))

plot <- ggarrange(g1,g2, ncol=2, nrow=1);

Hierarchical Models

formula_hierarchy_mean = "ls_mean ~ sup_parents_p1 + sup_parents_p2 + sup_parents_p3 + sup_friends_p1 + sup_friends_p2 + sup_friends_p3 + se_acad_p1 + se_acad_p2 + se_acad_p3 +
se_social_p1 + se_social_p2 + se_social_p3  + (1 | region)"

formula_hierarchy_sum = "ls_sum ~ sup_parents_p1 + sup_parents_p2 + sup_parents_p3 + sup_friends_p1 + sup_friends_p2 + sup_friends_p3 + se_acad_p1 + se_acad_p2 + se_acad_p3 +
se_social_p1 + se_social_p2 + se_social_p3 + (1 | region)"

hierarchical_mean_fit <- lmer(formula_hierarchy_mean, data = df, REML = TRUE)
hierarchical_sum_fit <- lmer(formula_hierarchy_sum, data = df, REML = TRUE)

g1 = modelplot(hierarchical_mean_fit, re.form=NA) +  aes(color = ifelse(p.value < 0.001, "Significant", "Not significant")) +
  scale_color_manual(values = c("grey", "black"),  guide = "none")

g2 = modelplot(hierarchical_sum_fit, re.form=NA) +  aes(color = ifelse(p.value < 0.001, "Significant", "Not significant")) +
  scale_color_manual(values = c("grey", "black"))


plot <- ggarrange(g1,g2, ncol=2, nrow=1);
Warning: Removed 2 rows containing missing values or values outside the scale range
(`geom_segment()`).
Removed 2 rows containing missing values or values outside the scale range
(`geom_segment()`).

modelsummary(list("hierarchical_mean_fit"= hierarchical_mean_fit,
                  "hierarchical_sum_fit"= hierarchical_sum_fit), 
             stars = TRUE) |> 
 style_tt(
   i = 2:25,
   j = 1:1,
   background = "#17C2AD",
   color = "white",
   italic = TRUE)
tinytable_ftl5t6kstw3adyosicbz
hierarchical_mean_fit hierarchical_sum_fit
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
(Intercept) 0.903* 2.710*
(0.385) (1.154)
sup_parents_p1 -0.007 -0.021
(0.053) (0.160)
sup_parents_p2 0.070 0.210
(0.053) (0.159)
sup_parents_p3 0.160*** 0.480***
(0.046) (0.139)
sup_friends_p1 -0.091+ -0.274+
(0.055) (0.166)
sup_friends_p2 0.125* 0.376*
(0.059) (0.177)
sup_friends_p3 0.024 0.072
(0.048) (0.144)
se_acad_p1 -0.041 -0.122
(0.071) (0.213)
se_acad_p2 0.131+ 0.393+
(0.074) (0.223)
se_acad_p3 0.039 0.116
(0.067) (0.202)
se_social_p1 0.094 0.283
(0.075) (0.224)
se_social_p2 0.191* 0.574*
(0.081) (0.244)
se_social_p3 0.130* 0.389*
(0.059) (0.178)
SD (Intercept region) 0.160 0.481
SD (Observations) 0.549 1.648
Num.Obs. 283 283
R2 Marg. 0.424 0.424
R2 Cond. 0.469 0.469
AIC 538.4 1131.6
BIC 593.1 1186.3
ICC 0.1 0.1
RMSE 0.54 1.61

Marginal Effects

pred <- predictions(hierarchical_mean_fit, newdata = datagrid(sup_parents_p3 = 1:10, region=c('east', 'west')), re.form=NA)
pred1 <- predictions(mod_mean, newdata = datagrid(sup_parents_p2 = 1:10))

pred1 |> head() |> kableExtra::kable()
rowid estimate std.error statistic p.value s.value conf.low conf.high se_acad_p1 se_acad_p2 se_acad_p3 se_social_p1 se_social_p2 se_social_p3 sup_friends_p1 sup_friends_p2 sup_friends_p3 sup_parents_p1 sup_parents_p3 sup_parents_p2 ls_mean
1 5.232276 0.2673967 19.56747 0 280.8136 4.708188 5.756363 5.154955 5.346853 5.211248 5.290047 5.477267 5.440989 5.786219 6.010601 5.991166 5.975265 5.719081 1 6.576815
2 5.287553 0.2140733 24.69973 0 445.0319 4.867977 5.707128 5.154955 5.346853 5.211248 5.290047 5.477267 5.440989 5.786219 6.010601 5.991166 5.975265 5.719081 2 6.576815
3 5.342829 0.1610972 33.16525 0 798.8130 5.027085 5.658574 5.154955 5.346853 5.211248 5.290047 5.477267 5.440989 5.786219 6.010601 5.991166 5.975265 5.719081 3 6.576815
4 5.398106 0.1089765 49.53461 0 Inf 5.184516 5.611696 5.154955 5.346853 5.211248 5.290047 5.477267 5.440989 5.786219 6.010601 5.991166 5.975265 5.719081 4 6.576815
5 5.453383 0.0599835 90.91472 0 Inf 5.335818 5.570949 5.154955 5.346853 5.211248 5.290047 5.477267 5.440989 5.786219 6.010601 5.991166 5.975265 5.719081 5 6.576815
6 5.508660 0.0334480 164.69327 0 Inf 5.443103 5.574217 5.154955 5.346853 5.211248 5.290047 5.477267 5.440989 5.786219 6.010601 5.991166 5.975265 5.719081 6 6.576815

Regression Marginal Effects

g = plot_predictions(mod_mean, condition = c("sup_parents_p3"), type = "response") + ggtitle("Counterfactual Shift of Outcome: sup_parents_p3", "Holding all else Fixed")

g1 = plot_predictions(mod_mean, condition = c("sup_friends_p1"), type = "response") + ggtitle("Counterfactual Shift of Outcome: sup_friends_p1", "Holding all else Fixed")

g2 = plot_predictions(mod_mean, condition = c("se_acad_p1"), 
                      type = "response") + ggtitle("Counterfactual Shift of Outcome: se_acad_p1", "Holding all else Fixed")

plot <- ggarrange(g,g1,g2, ncol=1, nrow=3);

Hierarchical Marginal Effects

g = plot_predictions(hierarchical_mean_fit, condition = c("sup_parents_p3", "region"), type = "response", re.form=NA) + ggtitle("Counterfactual Shift of Outcome: sup_parents_p3", "Holding all else Fixed")

g1 = plot_predictions(hierarchical_mean_fit, condition = c("sup_friends_p1", "region"), type = "response", re.form=NA) + ggtitle("Counterfactual Shift of Outcome: sup_friends_p1", "Holding all else Fixed")

g2 = plot_predictions(hierarchical_mean_fit, condition = c("se_acad_p1", "region"), 
                      type = "response", re.form=NA) + ggtitle("Counterfactual Shift of Outcome: se_acad_p1", "Holding all else Fixed")

plot <- ggarrange(g,g1,g2, ncol=1, nrow=3);

Confirmatory Factor Analysis

model_measurement <- "
# Measurement model
SUP_Parents =~ sup_parents_p1 + sup_parents_p2 + sup_parents_p3
SUP_Friends =~ sup_friends_p1 + sup_friends_p2 + sup_friends_p3
SE_Academic =~ se_acad_p1 + se_acad_p2 + se_acad_p3
SE_Social =~ se_social_p1 + se_social_p2 + se_social_p3
LS  =~ ls_p1 + ls_p2 + ls_p3
"

model_measurement1 <- "
# Measurement model
SUP_Parents =~ sup_parents_p1 + sup_parents_p2 + sup_parents_p3
SUP_Friends =~ a1*sup_friends_p1 + a2*sup_friends_p2 + a3*sup_friends_p3
SE_Academic =~ se_acad_p1 + se_acad_p2 + se_acad_p3
SE_Social =~ se_social_p1 + se_social_p2 + se_social_p3
LS  =~ ls_p1 + ls_p2 + ls_p3

a1 == a2 
a1 == a3
"


fit_mod <- cfa(model_measurement, data = df)
fit_mod_1<- cfa(model_measurement1, data = df)


cfa_models = list("full_measurement_model" = fit_mod, 
     "measurement_model_reduced" = fit_mod_1)
modelplot(cfa_models)

summary(fit_mod, fit.measures = TRUE, standardized = TRUE) 
lavaan 0.6-18 ended normally after 46 iterations

  Estimator                                         ML
  Optimization method                           NLMINB
  Number of model parameters                        40

  Number of observations                           283

Model Test User Model:
                                                      
  Test statistic                               207.137
  Degrees of freedom                                80
  P-value (Chi-square)                           0.000

Model Test Baseline Model:

  Test statistic                              2586.651
  Degrees of freedom                               105
  P-value                                        0.000

User Model versus Baseline Model:

  Comparative Fit Index (CFI)                    0.949
  Tucker-Lewis Index (TLI)                       0.933

Loglikelihood and Information Criteria:

  Loglikelihood user model (H0)              -4394.212
  Loglikelihood unrestricted model (H1)      -4290.643
                                                      
  Akaike (AIC)                                8868.423
  Bayesian (BIC)                              9014.241
  Sample-size adjusted Bayesian (SABIC)       8887.401

Root Mean Square Error of Approximation:

  RMSEA                                          0.075
  90 Percent confidence interval - lower         0.062
  90 Percent confidence interval - upper         0.088
  P-value H_0: RMSEA <= 0.050                    0.001
  P-value H_0: RMSEA >= 0.080                    0.263

Standardized Root Mean Square Residual:

  SRMR                                           0.056

Parameter Estimates:

  Standard errors                             Standard
  Information                                 Expected
  Information saturated (h1) model          Structured

Latent Variables:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
  SUP_Parents =~                                                        
    sup_parents_p1    1.000                               0.938    0.876
    sup_parents_p2    1.034    0.056   18.570    0.000    0.970    0.888
    sup_parents_p3    0.988    0.059   16.637    0.000    0.927    0.811
  SUP_Friends =~                                                        
    sup_friends_p1    1.000                               1.021    0.906
    sup_friends_p2    0.793    0.043   18.466    0.000    0.809    0.857
    sup_friends_p3    0.891    0.050   17.735    0.000    0.910    0.831
  SE_Academic =~                                                        
    se_acad_p1        1.000                               0.697    0.880
    se_acad_p2        0.806    0.049   16.278    0.000    0.561    0.819
    se_acad_p3        0.952    0.058   16.491    0.000    0.663    0.828
  SE_Social =~                                                          
    se_social_p1      1.000                               0.640    0.846
    se_social_p2      0.959    0.055   17.338    0.000    0.614    0.881
    se_social_p3      0.926    0.066   13.956    0.000    0.593    0.742
  LS =~                                                                 
    ls_p1             1.000                               0.677    0.729
    ls_p2             0.820    0.078   10.488    0.000    0.555    0.762
    ls_p3             0.744    0.109    6.809    0.000    0.504    0.459

Covariances:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
  SUP_Parents ~~                                                        
    SUP_Friends       0.134    0.064    2.094    0.036    0.140    0.140
    SE_Academic       0.219    0.046    4.717    0.000    0.335    0.335
    SE_Social         0.284    0.046    6.239    0.000    0.473    0.473
    LS                0.360    0.056    6.455    0.000    0.567    0.567
  SUP_Friends ~~                                                        
    SE_Academic       0.071    0.047    1.493    0.135    0.100    0.100
    SE_Social         0.195    0.046    4.262    0.000    0.299    0.299
    LS                0.184    0.053    3.500    0.000    0.267    0.267
  SE_Academic ~~                                                        
    SE_Social         0.274    0.036    7.525    0.000    0.613    0.613
    LS                0.212    0.039    5.407    0.000    0.449    0.449
  SE_Social ~~                                                          
    LS                0.330    0.043    7.650    0.000    0.761    0.761

Variances:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
   .sup_parents_p1    0.268    0.037    7.143    0.000    0.268    0.233
   .sup_parents_p2    0.253    0.038    6.595    0.000    0.253    0.212
   .sup_parents_p3    0.446    0.048    9.262    0.000    0.446    0.342
   .sup_friends_p1    0.228    0.040    5.663    0.000    0.228    0.179
   .sup_friends_p2    0.237    0.030    7.926    0.000    0.237    0.266
   .sup_friends_p3    0.371    0.042    8.811    0.000    0.371    0.310
   .se_acad_p1        0.141    0.022    6.487    0.000    0.141    0.226
   .se_acad_p2        0.154    0.018    8.648    0.000    0.154    0.329
   .se_acad_p3        0.202    0.024    8.397    0.000    0.202    0.315
   .se_social_p1      0.163    0.020    8.095    0.000    0.163    0.284
   .se_social_p2      0.109    0.016    6.782    0.000    0.109    0.224
   .se_social_p3      0.287    0.028   10.141    0.000    0.287    0.450
   .ls_p1             0.404    0.048    8.422    0.000    0.404    0.469
   .ls_p2             0.223    0.029    7.624    0.000    0.223    0.420
   .ls_p3             0.951    0.085   11.123    0.000    0.951    0.789
    SUP_Parents       0.880    0.098    8.937    0.000    1.000    1.000
    SUP_Friends       1.042    0.111    9.405    0.000    1.000    1.000
    SE_Academic       0.485    0.054    8.908    0.000    1.000    1.000
    SE_Social         0.410    0.048    8.459    0.000    1.000    1.000
    LS                0.458    0.072    6.323    0.000    1.000    1.000
g1 = plot_heatmap(cov(df[,  drivers]))

g2 = plot_heatmap(data.frame(fitted(fit_mod)$cov), title="Model Implied Covariances", "Fitted Values")

plot <- ggarrange(g1,g2, ncol=1, nrow=2);

Modification Indices

modindices(fit_mod, sort = TRUE, maximum.number = 6, standardized = FALSE, minimum.value = 5)
               lhs op            rhs     mi    epc
152 sup_friends_p1 ~~   se_social_p3 19.245  0.095
68     SUP_Friends =~          ls_p2 17.491  0.181
64     SUP_Friends =~   se_social_p1 17.210 -0.136
60     SUP_Friends =~ sup_parents_p3 16.063 -0.187
210          ls_p2 ~~          ls_p3 13.931 -0.140
57     SUP_Parents =~          ls_p3 13.057  0.329

Summary Global Fit Measures

summary_df = cbind(fitMeasures(fit_mod, c("chisq", "baseline.chisq", "cfi", "aic", "bic", "rmsea","srmr")),
      fitMeasures(fit_mod_1, c("chisq", "baseline.chisq", "cfi", "aic", "bic", "rmsea","srmr")))
colnames(summary_df) = c('Full Model', 'Reduced Model')

summary_df
                  Full Model Reduced Model
chisq           207.13746909  225.18095274
baseline.chisq 2586.65112811 2586.65112811
cfi               0.94876900    0.94230416
aic            8868.42313715 8882.46662079
bic            9014.24101305 9020.99360290
rmsea             0.07493739    0.07854933
srmr              0.05595908    0.05904842
semPlot::semPaths(fit_mod, whatLabels = 'est', intercepts = FALSE)

semPlot::semPaths(fit_mod_1, whatLabels = 'std', intercepts = FALSE)

Comparing the empirical and implied variance-covariance matrix

heat_df = data.frame(resid(fit_mod, type = "standardized")$cov) 
heat_df = heat_df |> as.matrix() |> melt()
colnames(heat_df) <- c("x", "y", "value")

g1 = heat_df  |> mutate(value = round(value, 2)) |>  ggplot(aes(x = x, y = y, fill = value)) +
  geom_tile() + geom_text(aes(label = value), color = "black", size = 4) +
 scale_fill_gradient2(
    high = 'dodgerblue4',
    mid = 'white',
    low = 'firebrick2'
  ) + theme(axis.text.x = element_text(angle=45)) + ggtitle("Residuals of the Sample Covariances and Model Implied Covariances", "A Visual Check of Model fit: Full Model")

heat_df = data.frame(resid(fit_mod_1, type = "standardized")$cov) 
heat_df = heat_df |> as.matrix() |> melt()
colnames(heat_df) <- c("x", "y", "value")

g2 = heat_df  |> mutate(value = round(value, 2)) |>  ggplot(aes(x = x, y = y, fill = value)) +
  geom_tile() + geom_text(aes(label = value), color = "black", size = 4) +
 scale_fill_gradient2(
    high = 'dodgerblue4',
    mid = 'white',
    low = 'firebrick2'
  ) + theme(axis.text.x = element_text(angle=45)) + ggtitle("Residuals of the Sample Covariances and Model Implied Covariances", "A Visual Check of Model fit: Reduced Model")

plot <- ggarrange(g1,g2, ncol=1, nrow=2);

anova(fit_mod)
Chi-Squared Test Statistic (unscaled)

          Df    AIC    BIC  Chisq Chisq diff Df diff         Pr(>Chisq)    
Saturated  0                 0.00                                          
Model     80 8868.4 9014.2 207.14     207.14      80 0.0000000000003209 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(fit_mod_1)
Chi-Squared Test Statistic (unscaled)

          Df    AIC  BIC  Chisq Chisq diff Df diff           Pr(>Chisq)    
Saturated  0               0.00                                            
Model     82 8882.5 9021 225.18     225.18      82 0.000000000000002776 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(fit_mod, fit_mod_1)

Chi-Squared Difference Test

          Df    AIC    BIC  Chisq Chisq diff   RMSEA Df diff Pr(>Chisq)    
fit_mod   80 8868.4 9014.2 207.14                                          
fit_mod_1 82 8882.5 9021.0 225.18     18.044 0.16836       2  0.0001208 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Structural Equation Models

model <- "
# Measurement model
SUP_Parents =~ sup_parents_p1 + sup_parents_p2 + sup_parents_p3
SUP_Friends =~ sup_friends_p1 + sup_friends_p2 + sup_friends_p3
SE_Academic =~ se_acad_p1 + se_acad_p2 + se_acad_p3
SE_Social =~ se_social_p1 + se_social_p2 + se_social_p3
LS  =~ ls_p1 + ls_p2 + ls_p3

# Structural model 
# Regressions
LS ~ SE_Academic + SE_Social + SUP_Parents + SUP_Friends

# Residual covariances
SE_Academic ~~ SE_Social
"

fit_mod_sem <- sem(model, data = df)
modelplot(fit_mod_sem)

semPlot::semPaths(fit_mod_sem, whatLabels = 'std', intercepts = FALSE)

heat_df = data.frame(resid(fit_mod_sem, type = "standardized")$cov) 
heat_df = heat_df |> as.matrix() |> melt()
colnames(heat_df) <- c("x", "y", "value")

heat_df  |> mutate(value = round(value, 2)) |>  ggplot(aes(x = x, y = y, fill = value)) +
  geom_tile() + geom_text(aes(label = value), color = "black", size = 4) +
 scale_fill_gradient2(
    high = 'dodgerblue4',
    mid = 'white',
    low = 'firebrick2'
  ) + theme(axis.text.x = element_text(angle=45)) + ggtitle("Residuals of the Sample Covariances and Model Implied Covariances", "A Visual Check of Model fit")

Confirmatory Factor Models with PyMC

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pytensor import tensor as pt
import arviz as az
import networkx as nx



df_p = pd.read_csv('IIS.dat', sep='\s+')
df_p.head()
     PI    AD   IGC   FI   FC
0  4.00  3.38  4.67  2.6  3.2
1  2.57  3.00  3.50  2.4  2.8
2  2.29  3.29  4.83  2.0  3.4
3  2.43  3.63  4.33  3.6  3.8
4  3.00  4.00  4.83  3.4  3.8
coords = {'obs': list(range(len(df_p))), 
          'indicators': ['PI', 'AD',    'IGC', 'FI', 'FC'],
          'indicators_1': ['PI', 'AD',  'IGC'],
          'indicators_2': ['FI', 'FC'],
          'latent': ['Student', 'Faculty']
          }


obs_idx = list(range(len(df_p)))
with pm.Model(coords=coords) as model:
  
  Psi = pm.InverseGamma('Psi', 5, 10, dims='indicators')
  lambdas_ = pm.Normal('lambdas_1', 1, 10, dims=('indicators_1'))
  lambdas_1 = pm.Deterministic('lambdas1', pt.set_subtensor(lambdas_[0], 1), dims=('indicators_1'))
  lambdas_ = pm.Normal('lambdas_2', 1, 10, dims=('indicators_2'))
  lambdas_2 = pm.Deterministic('lambdas2', pt.set_subtensor(lambdas_[0], 1), dims=('indicators_2'))
  tau = pm.Normal('tau', 3, 10, dims='indicators')
  kappa = 0
  sd_dist = pm.Exponential.dist(1.0, shape=2)
  chol, _, _ = pm.LKJCholeskyCov('chol_cov', n=2, eta=2,
    sd_dist=sd_dist, compute_corr=True)
  ksi = pm.MvNormal('ksi', kappa, chol=chol, dims=('obs', 'latent'))

  m1 = tau[0] + ksi[obs_idx, 0]*lambdas_1[0]
  m2 = tau[1] + ksi[obs_idx, 0]*lambdas_1[1]
  m3 = tau[2] + ksi[obs_idx, 0]*lambdas_1[2]
  m4 = tau[3] + ksi[obs_idx, 1]*lambdas_2[0]
  m5 = tau[4] + ksi[obs_idx, 1]*lambdas_2[1]
  
  mu = pm.Deterministic('mu', pm.math.stack([m1, m2, m3, m4, m5]).T)
  _  = pm.Normal('likelihood', mu, Psi, observed=df_p.values)

  idata = pm.sample(nuts_sampler='numpyro', target_accept=.95)
  
summary_df = az.summary(idata, var_names=['lambdas1', 'lambdas2', 'tau', 'Psi', 'ksi', 'chol_cov_corr'], coords= {'obs': [0, 7]})

PyMC Confirmatory Factor Model

py$summary_df |> kable()
mean sd hdi_3% hdi_97% mcse_mean mcse_sd ess_bulk ess_tail r_hat
lambdas1[PI] 1.000 0.000 1.000 1.000 0.000 0.000 4000 4000 NaN
lambdas1[AD] 0.903 0.066 0.786 1.029 0.004 0.003 340 524 1.01
lambdas1[IGC] 0.538 0.046 0.454 0.628 0.002 0.002 473 741 1.00
lambdas2[FI] 1.000 0.000 1.000 1.000 0.000 0.000 4000 4000 NaN
lambdas2[FC] 0.979 0.056 0.871 1.082 0.003 0.002 435 724 1.01
tau[PI] 3.335 0.036 3.269 3.403 0.001 0.001 631 1262 1.00
tau[AD] 3.900 0.025 3.853 3.950 0.001 0.001 423 941 1.00
tau[IGC] 4.597 0.021 4.558 4.636 0.001 0.001 702 1364 1.00
tau[FI] 3.037 0.039 2.967 3.112 0.002 0.001 474 1168 1.00
tau[FC] 3.716 0.034 3.652 3.782 0.002 0.001 389 842 1.00
Psi[PI] 0.610 0.024 0.564 0.655 0.001 0.000 1370 2631 1.00
Psi[AD] 0.317 0.020 0.281 0.353 0.001 0.001 679 1551 1.00
Psi[IGC] 0.355 0.013 0.332 0.380 0.000 0.000 2467 3095 1.00
Psi[FI] 0.570 0.025 0.521 0.616 0.001 0.001 1143 1769 1.00
Psi[FC] 0.421 0.026 0.375 0.471 0.001 0.001 804 1246 1.01
ksi[0, Student] -0.226 0.225 -0.649 0.207 0.004 0.003 3803 3018 1.00
ksi[0, Faculty] -0.369 0.272 -0.859 0.144 0.004 0.003 4030 2898 1.00
ksi[7, Student] 0.876 0.228 0.467 1.313 0.004 0.003 2576 2864 1.00
ksi[7, Faculty] 0.864 0.272 0.346 1.354 0.004 0.003 4126 3083 1.00
chol_cov_corr[0, 0] 1.000 0.000 1.000 1.000 0.000 0.000 4000 4000 NaN
chol_cov_corr[0, 1] 0.852 0.028 0.801 0.905 0.001 0.001 474 493 1.01
chol_cov_corr[1, 0] 0.852 0.028 0.801 0.905 0.001 0.001 474 493 1.01
chol_cov_corr[1, 1] 1.000 0.000 1.000 1.000 0.000 0.000 3890 4000 1.00
az.plot_trace(idata, var_names=['lambdas1', 'lambdas2', 'tau', 'Psi', 'ksi']);
/Users/nathanielforde/mambaforge/envs/pymc_causal/lib/python3.11/site-packages/arviz/stats/density_utils.py:487: UserWarning: Your data appears to have a single value or no finite values
  warnings.warn("Your data appears to have a single value or no finite values")
/Users/nathanielforde/mambaforge/envs/pymc_causal/lib/python3.11/site-packages/arviz/stats/density_utils.py:487: UserWarning: Your data appears to have a single value or no finite values
  warnings.warn("Your data appears to have a single value or no finite values")

df = pd.read_csv('sem_data.csv')
drivers = ['se_acad_p1', 'se_acad_p2',
       'se_acad_p3', 'se_social_p1', 'se_social_p2', 'se_social_p3',
       'sup_friends_p1', 'sup_friends_p2', 'sup_friends_p3', 'sup_parents_p1',
       'sup_parents_p2', 'sup_parents_p3', 'ls_p1', 'ls_p2', 'ls_p3']
       
       
coords = {'obs': list(range(len(df))), 
          'indicators': drivers,
          'indicators_1': ['se_acad_p1','se_acad_p2','se_acad_p3'],
          'indicators_2': ['se_social_p1','se_social_p2','se_social_p3'],
          'indicators_3': ['sup_friends_p1','sup_friends_p2','sup_friends_p3'],
          'indicators_4': [ 'sup_parents_p1','sup_parents_p2','sup_parents_p3'],
          'indicators_5': ['ls_p1','ls_p2', 'ls_p3'],
          'latent': ['SE_ACAD', 'SE_SOCIAL', 'SUP_F', 'SUP_P', 'LS'],
          'latent1': ['SE_ACAD', 'SE_SOCIAL', 'SUP_F', 'SUP_P', 'LS']
          }

obs_idx = list(range(len(df)))
with pm.Model(coords=coords) as model:
  
  Psi = pm.InverseGamma('Psi', 5, 10, dims='indicators')
  lambdas_ = pm.Normal('lambdas_1', 1, 10, dims=('indicators_1'))
  lambdas_1 = pm.Deterministic('lambdas1', pt.set_subtensor(lambdas_[0], 1), dims=('indicators_1'))
  lambdas_ = pm.Normal('lambdas_2', 1, 10, dims=('indicators_2'))
  lambdas_2 = pm.Deterministic('lambdas2', pt.set_subtensor(lambdas_[0], 1), dims=('indicators_2'))
  lambdas_ = pm.Normal('lambdas_3', 1, 10, dims=('indicators_3'))
  lambdas_3 = pm.Deterministic('lambdas3', pt.set_subtensor(lambdas_[0], 1), dims=('indicators_3'))
  lambdas_ = pm.Normal('lambdas_4', 1, 10, dims=('indicators_4'))
  lambdas_4 = pm.Deterministic('lambdas4', pt.set_subtensor(lambdas_[0], 1), dims=('indicators_4'))
  lambdas_ = pm.Normal('lambdas_5', 1, 10, dims=('indicators_5'))
  lambdas_5 = pm.Deterministic('lambdas5', pt.set_subtensor(lambdas_[0], 1), dims=('indicators_5'))
  tau = pm.Normal('tau', 3, 10, dims='indicators')
  kappa = 0
  sd_dist = pm.Exponential.dist(1.0, shape=5)
  chol, _, _ = pm.LKJCholeskyCov('chol_cov', n=5, eta=2,
    sd_dist=sd_dist, compute_corr=True)
  cov = pm.Deterministic("cov", chol.dot(chol.T), dims=('latent', 'latent1'))
  ksi = pm.MvNormal('ksi', kappa, chol=chol, dims=('obs', 'latent'))

  m0 = tau[0] + ksi[obs_idx, 0]*lambdas_1[0]
  m1 = tau[1] + ksi[obs_idx, 0]*lambdas_1[1]
  m2 = tau[2] + ksi[obs_idx, 0]*lambdas_1[2]
  m3 = tau[3] + ksi[obs_idx, 1]*lambdas_2[0]
  m4 = tau[4] + ksi[obs_idx, 1]*lambdas_2[1]
  m5 = tau[5] + ksi[obs_idx, 1]*lambdas_2[2]
  m6 = tau[6] + ksi[obs_idx, 2]*lambdas_3[0]
  m7 = tau[7] + ksi[obs_idx, 2]*lambdas_3[1]
  m8 = tau[8] + ksi[obs_idx, 2]*lambdas_3[2]
  m9 = tau[9] + ksi[obs_idx, 3]*lambdas_4[0]
  m10 = tau[10] + ksi[obs_idx, 3]*lambdas_4[1]
  m11 = tau[11] + ksi[obs_idx, 3]*lambdas_4[2]
  m12 = tau[12] + ksi[obs_idx, 4]*lambdas_5[0]
  m13 = tau[13] + ksi[obs_idx, 4]*lambdas_5[1]
  m14 = tau[14] + ksi[obs_idx, 4]*lambdas_5[2]
  
  mu = pm.Deterministic('mu', pm.math.stack([m0, m1, m2, m3, m4, m5, m6, m7,
                                             m8, m9, m10, m11, m12, m13, m14]).T)
  _  = pm.Normal('likelihood', mu, Psi, observed=df[drivers].values)

  idata = pm.sample(nuts_sampler='numpyro', target_accept=.95)
  
summary_df1 = az.summary(idata, var_names=['lambdas1', 'lambdas2', 'lambdas3', 'lambdas4', 'lambdas5', 'tau', 'Psi'])

cov_df = pd.DataFrame(az.extract(idata['posterior'])['cov'].mean(axis=2))
cov_df.index = ['SE_ACAD', 'SE_SOCIAL', 'SUP_F', 'SUP_P', 'LS']
cov_df.columns = ['SE_ACAD', 'SE_SOCIAL', 'SUP_F', 'SUP_P', 'LS']

correlation_df = pd.DataFrame(az.extract(idata['posterior'])['chol_cov_corr'].mean(axis=2))
correlation_df.index = ['SE_ACAD', 'SE_SOCIAL', 'SUP_F', 'SUP_P', 'LS']
correlation_df.columns = ['SE_ACAD', 'SE_SOCIAL', 'SUP_F', 'SUP_P', 'LS']
py$summary_df1 |> kable()
mean sd hdi_3% hdi_97% mcse_mean mcse_sd ess_bulk ess_tail r_hat
lambdas1[se_acad_p1] 1.000 0.000 1.000 1.000 0.000 0.000 4000 4000 NaN
lambdas1[se_acad_p2] 0.818 0.054 0.712 0.914 0.002 0.001 903 1627 1.01
lambdas1[se_acad_p3] 0.969 0.061 0.854 1.081 0.002 0.001 925 1255 1.01
lambdas2[se_social_p1] 1.000 0.000 1.000 1.000 0.000 0.000 4000 4000 NaN
lambdas2[se_social_p2] 0.960 0.059 0.852 1.074 0.002 0.002 725 1398 1.00
lambdas2[se_social_p3] 0.937 0.073 0.796 1.072 0.002 0.002 1030 2140 1.00
lambdas3[sup_friends_p1] 1.000 0.000 1.000 1.000 0.000 0.000 4000 4000 NaN
lambdas3[sup_friends_p2] 0.801 0.045 0.716 0.886 0.001 0.001 1107 1774 1.00
lambdas3[sup_friends_p3] 0.904 0.052 0.800 1.000 0.001 0.001 1234 2111 1.00
lambdas4[sup_parents_p1] 1.000 0.000 1.000 1.000 0.000 0.000 4000 4000 NaN
lambdas4[sup_parents_p2] 1.039 0.058 0.932 1.145 0.002 0.001 907 1546 1.00
lambdas4[sup_parents_p3] 1.007 0.063 0.894 1.127 0.002 0.001 1063 1654 1.00
lambdas5[ls_p1] 1.000 0.000 1.000 1.000 0.000 0.000 4000 4000 NaN
lambdas5[ls_p2] 0.790 0.081 0.638 0.940 0.003 0.002 551 1113 1.01
lambdas5[ls_p3] 0.990 0.099 0.801 1.171 0.004 0.003 519 1369 1.01
tau[se_acad_p1] 5.155 0.049 5.064 5.245 0.003 0.002 361 788 1.01
tau[se_acad_p2] 5.347 0.042 5.264 5.422 0.002 0.002 389 921 1.01
tau[se_acad_p3] 5.211 0.049 5.119 5.303 0.002 0.002 393 914 1.01
tau[se_social_p1] 5.291 0.045 5.209 5.379 0.002 0.002 341 611 1.01
tau[se_social_p2] 5.479 0.041 5.401 5.556 0.002 0.002 321 648 1.01
tau[se_social_p3] 5.442 0.049 5.352 5.534 0.002 0.002 450 849 1.00
tau[sup_friends_p1] 5.786 0.066 5.659 5.902 0.003 0.002 372 777 1.01
tau[sup_friends_p2] 6.010 0.055 5.913 6.115 0.003 0.002 391 942 1.01
tau[sup_friends_p3] 5.991 0.065 5.870 6.114 0.003 0.002 415 1011 1.01
tau[sup_parents_p1] 5.975 0.063 5.856 6.090 0.003 0.002 395 926 1.00
tau[sup_parents_p2] 5.926 0.064 5.811 6.054 0.003 0.002 386 855 1.00
tau[sup_parents_p3] 5.719 0.068 5.595 5.852 0.003 0.002 446 921 1.00
tau[ls_p1] 5.193 0.056 5.094 5.304 0.002 0.002 505 1251 1.00
tau[ls_p2] 5.778 0.044 5.699 5.860 0.002 0.001 541 1289 1.00
tau[ls_p3] 5.223 0.052 5.122 5.319 0.002 0.002 469 761 1.00
Psi[se_acad_p1] 0.412 0.028 0.364 0.467 0.001 0.001 1271 2427 1.00
Psi[se_acad_p2] 0.413 0.023 0.367 0.455 0.000 0.000 2571 2655 1.00
Psi[se_acad_p3] 0.467 0.028 0.419 0.522 0.001 0.000 1976 2777 1.00
Psi[se_social_p1] 0.430 0.026 0.381 0.477 0.001 0.000 1511 1986 1.00
Psi[se_social_p2] 0.362 0.025 0.315 0.407 0.001 0.000 1211 1534 1.00
Psi[se_social_p3] 0.554 0.028 0.504 0.610 0.001 0.000 2475 2428 1.00
Psi[sup_friends_p1] 0.514 0.040 0.440 0.590 0.001 0.001 873 1159 1.00
Psi[sup_friends_p2] 0.508 0.031 0.447 0.562 0.001 0.001 1508 2500 1.00
Psi[sup_friends_p3] 0.625 0.036 0.564 0.697 0.001 0.001 2056 2894 1.00
Psi[sup_parents_p1] 0.551 0.035 0.488 0.619 0.001 0.001 1442 2421 1.00
Psi[sup_parents_p2] 0.535 0.037 0.466 0.603 0.001 0.001 1418 2420 1.00
Psi[sup_parents_p3] 0.675 0.039 0.603 0.749 0.001 0.001 1921 2311 1.00
Psi[ls_p1] 0.671 0.037 0.600 0.742 0.001 0.001 1168 2521 1.00
Psi[ls_p2] 0.533 0.031 0.474 0.590 0.001 0.001 1471 2421 1.00
Psi[ls_p3] 0.623 0.036 0.555 0.689 0.001 0.001 1512 1851 1.00

az.plot_forest(idata, var_names=['lambdas1', 'lambdas2', 'lambdas3', 'lambdas4', 'lambdas5'], combined=True);

py$cov_df |> kable()
SE_ACAD SE_SOCIAL SUP_F SUP_P LS
SE_ACAD 0.4706940 0.2603813 0.0631096 0.2027756 0.2216762
SE_SOCIAL 0.2603813 0.3974050 0.1868705 0.2677145 0.3040071
SUP_F 0.0631096 0.1868705 1.0358624 0.1195301 0.1642341
SUP_P 0.2027756 0.2677145 0.1195301 0.8623949 0.3841457
LS 0.2216762 0.3040071 0.1642341 0.3841457 0.4221140
py$correlation_df |> kable()
SE_ACAD SE_SOCIAL SUP_F SUP_P LS
SE_ACAD 1.0000000 0.6025576 0.0903843 0.3184307 0.4987059
SE_SOCIAL 0.6025576 1.0000000 0.2916484 0.4574083 0.7448812
SUP_F 0.0903843 0.2916484 1.0000000 0.1264455 0.2493548
SUP_P 0.3184307 0.4574083 0.1264455 1.0000000 0.6389961
LS 0.4987059 0.7448812 0.2493548 0.6389961 1.0000000

Citation

BibTeX citation:
@online{forde,
  author = {Nathaniel Forde},
  title = {Confirmatory {Factor} {Analysis} and {Structural} {Equation}
    {Models}},
  langid = {en}
}
For attribution, please cite this work as:
Nathaniel Forde. n.d. “Confirmatory Factor Analysis and Structural Equation Models.”